*****
***** HEADER
*****


     *** Contact information

        * Michael Punzalan/Emi Terasawa/Elena Prager
        * Department of Health Care Management
        * The Wharton School
        * University of Pennsylvania
        * 3641 Locust Walk
        * Philadelphia, PA 19104
        * mpunz@wharton.upenn.edu/emit@wharton.upenn.edu/elprager@wharton.upenn.edu
        * 267-507-4358/215-779-0717/408-666-0653


    *** Notes
	
		* This do-file imports raw coronary data from MRG for the US and appends various data years.
		* It then changes / merges / deletes stent names as needed.
		* The next do-file to be run is cleaning-us-data-2-sample.do, which cleans the appended-together data extract based on data quality.
		* Both files are based on the Jun 2014 version of cleaning-us-data.do, plus moving downstream cleaning code up into these files.
		
		* Notes on data files
			* CSV data files imported in this do-file were exported from original MRG Excel spreadsheets sheet-by-sheet.
				* Exceptions: USdevices_old.dta (device utilization data starting in 2004) was created by create-USdevices-old.do; USprocedures_2007m6.dta by create-USprocedures-old.do
			* First sheet in MRG spreadsheets is devices, second sheet is diagnostic procedures.
			* Files named *_2007m6 are the oldest (Jan 2004 - Jun 2007), *_2012m7 are intermediate (Jan 2005 - Jul 2012), and *_2013Q2 are the newest (Jan 2006 - Jun 2013)


*****
***** PRELIMINARIES
*****



    * Set program-specific directory options
    * The working directory is the folder one level above _data, _documents, etc.
    * The program name is the name of the DO file, minus ".do"
    * global dirwork      "/home/hcrm/mpunz/Dropbox/IntlMedDevMkts/_temp-mike"
    * global dirwork  "/Users/Ellie/Dropbox/RA_work/IntlMedDevMkts/_temp-mike"
    *global dirwork  "/Users/eprager/Dropbox/RA_work/IntlMedDevMkts/_temp-mike"
	global dirwork  "~/Dropbox/Research/IntlMedDevMkts/_temp-mike"
    global progname     "cleaning-us-data"

    * The program and output folders are each assigned to a unique variable
    global dirdocuments "$dirwork/_documents"
    global dirresults   "$dirwork/_results"
    global dirprograms  "$dirwork/_programs"
    global dirscratch   "$dirwork/_scratch/$progname"

    * The original and analysis subfolders are each assigned to separate variables
    * This provides some protection against accidentally destrotying the original data
    global dirodata     "$dirwork/_data/original"
    global diradata     "$dirwork/_data/analysis"

* Create scratch folder if necessary
cap mkdir $dirscratch
cap log close
log using $dirscratch/log.txt, replace t

    *** Change working directory
    cd $dirwork

	set more off
	
	
*****
***** EXECUTION
*****


	
*****
***** IMPORT DIAGNOSTIC PROCEDURES DATA
*****

***
*** 2004 DATA
***

	*** Loading uncleaned diagnostic angiographies data
    clear
	use $dirodata/US/USprocedures_2007m6.dta
    	
    *** Renaming variables
    rename institution_id lab_id

    *** Fixing duplicate entries
    duplicates drop
    duplicates list lab_id mm yyyy
    drop if lab_id=="CathLab257" & mm==10 & yyyy==2005 & diagnosticangiography==0
	
	*** Keeping 2004 only
	keep if yyyy==2004

	*** Sorting data for merge
	sort lab_id yyyy mm

	*** Saving data
	save $dirscratch/USprocedures_unclean_2004.dta, replace

***
*** 2005-2012 DATA
***

    *** Loading uncleaned diagnostic angiographies data
    clear
    insheet using $dirodata/US/USprocedures_2012m7.csv, comma names
    
    *** Renaming variables
    rename institution_id lab_id
    rename diagangiography_procperf diagnosticangiography
       
    *** Formatting
    sort lab_id state_code region yyyy mm
 
 	*** Checking for duplicates 
 	duplicates list lab_id mm yyyy
 	* Note: some duplicates exist
 	* lab_id==CathLab207 & mm==1 & yyyy==2010 & (statistics_id==11981 | statistics_id==12019)  12019 looks off
 	* lab_id==CathLab257 & mm==10 & yyyy==2005 & (statistics_id==4189 | statistics_id==4260)  4260 looks off
 	* lab_id==CathLab327 & mm==4 & yyyy==2011 & (statistics_id==14386 | statistics_id==14504)  unclear which is incorrect
 	
	*** Sorting data for merge
	sort lab_id yyyy mm statistics_id

	*** Appending to 2004 data
    append using $dirscratch/USprocedures_unclean_2004.dta

	*** Saving data
	save $dirscratch/USprocedures_unclean_2012m7.dta, replace
	
***
*** 2012-2013 DATA
***
	
	*** Loading uncleaned diagnostic angiographies data
    clear
    insheet using $dirodata/US/USprocedures_2013Q2.csv, comma names
	
	*** Keep only Aug 2012 onward (2005-2012 data is thru Jul 2012)
	drop if yyyy<2012 | (yyyy==2012 & mm<8)
    
	*** Keep and rename variables to match earlier data
    rename institution_id lab_id
    rename diagangiography_procperf diagnosticangiography
	keep lab_id statistics_id region mm yyyy diagnosticangiography
	
    *** Formatting
    sort lab_id region yyyy mm
	
	*** Fix lab names	
	replace lab_id="CathLab574" if lab_id=="Cathlab574"
	
	*** Appending to 2004, 2005-2012 data
    append using $dirscratch/USprocedures_unclean_2012m7.dta
    
	*** Looking into entries with multiple statistics_id for the same lab-month-year combo; keeping obs. w/ reasonable values of diagnostic angiographies
    	*	duplicates list lab_id yyyy mm
		* Lab CathLab207 for January 2010
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab207" & mm==1 & yyyy==2010
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab207" & mm==2 & yyyy==2010
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab207" & mm==12 & yyyy==2009
		drop if lab_id=="CathLab207" & mm==1 & yyyy==2010 & statistics_id==12019
		* Lab CathLab257 for October 2005
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab257" & mm==10 & yyyy==2005
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab257" & mm==11 & yyyy==2005
    	list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab257" & mm==9 & yyyy==2005
		drop if lab_id=="CathLab257" & mm==10 & yyyy==2005 & statistics_id==4260
		* Lab CathLab327 for April 2011
		list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab327" & mm==4 & yyyy==2011
		list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab327" & mm==5 & yyyy==2011
		list lab_id mm yyyy statistics_id state_code diagnosticangiography if lab_id=="CathLab327" & mm==3 & yyyy==2011
		drop if lab_id=="CathLab327" & mm==4 & yyyy==2011 & statistics_id==14504	
	
	*** Saving data
	save $dirscratch/USprocedures_unclean_2013Q2.dta, replace	
	
	

*****
***** IMPORT DEVICE USAGE DATA
*****


***
*** 2004 DATA
***
	
	*** Import device data
	clear
    use $diradata/US/USdevices_old.dta
	
	*** Keeping 2004 only
	keep if yyyy==2004

    *** Renaming variables 
    rename institution_id lab_id
    rename product_name product
    rename manufacturer_name manufacturer
	replace manufacturer = strupper(manufacturer)
    rename units q
    rename avgp avgprice
        
    *** Dropping duplicate entries
	*duplicates report
    duplicates drop

    *** Sort and save
    gsort lab_id yyyy mm
	save $dirscratch/USdevices_unclean_2004.dta, replace


***
*** 2005-2012 DATA
***
	
	*** Loading devices data, 2005-2012
    clear
    insheet using $dirodata/US/USdevices_2012m7.csv, comma names

	*** Renaming variables
	rename institution_id lab_id 
    rename units q
    rename product_name product
    rename avgp avgprice
    rename manufacturer_name manufacturer
	replace manufacturer = strupper(manufacturer)
	rename free_units freeunits

	*** Renaming lab ids
    replace lab_id="CathLab377" if lab_id=="CATHLAB377"
    replace lab_id="CathLab245" if lab_id=="Cathlab245"
    replace lab_id="CathLab291" if lab_id=="Cathlab291"
    replace lab_id="CathLab304" if lab_id=="Cathlab304"
    replace lab_id="CathLab305" if lab_id=="Cathlab305"
    replace lab_id="CathLab309" if lab_id=="Cathlab309"
    replace lab_id="CathLab363" if lab_id=="Cathlab363"
    replace lab_id="CathLab392" if lab_id=="Cathlab392"
    replace lab_id="CathLab395" if lab_id=="Cathlab395"
    replace lab_id="CathLab399" if lab_id=="Cathlab399"
    replace lab_id="CathLab420" if lab_id=="Cathlab420"
    replace lab_id="CathLab094" if lab_id=="cathLab094"
    replace lab_id="CathLab217" if lab_id=="cathLab217"
    replace lab_id="CathLab041" if lab_id=="cathlab041"
    replace lab_id="CathLab094" if lab_id=="cathlab094"
    replace lab_id="CathLab160" if lab_id=="cathlab160"
    replace lab_id="CathLab174" if lab_id=="cathlab174"
    replace lab_id="CathLab205" if lab_id=="cathlab205"
    replace lab_id="CathLab234" if lab_id=="cathlab234"
    replace lab_id="CathLab319" if lab_id=="cathlab319"

    *** Dropping duplicate entries
    *duplicates report
    duplicates drop    

    *** Appending to 2004 data
	gsort lab_id yyyy mm statistics_id
	keep statistics_id yyyy mm quarter lab_id size state_code region group_name manufacturer compliant balloon product length diameter q revenue type_name avgprice platform_name freeunits
    append using $dirscratch/USdevices_unclean_2004.dta
	drop privatepublic

	*** Saving data
	gsort lab_id yyyy mm statistics_id
	save $dirscratch/USdevices_unclean_2012m7.dta, replace

***
*** 2012-2013 DATA
***
	
	*** Loading devices data, 2005-2012
    clear
    insheet using $dirodata/US/USdevices_2013Q2.csv, comma names
	
	*** Keep only Aug 2012 onward (2005-2012 data is thru Jul 2012)
	drop if yyyy<2012 | (yyyy==2012 & mm<8)
	
	*** Rename and reformat variables to match earlier data (note no avgprice variable in this data)
	rename institution_id lab_id 
    rename units q
    rename product_name product
    rename manufacturer_name manufacturer
	replace manufacturer = strupper(manufacturer)
	rename free_units freeunits
	replace freeunits="0" if freeunits=="FALSE"
	replace freeunits="1" if freeunits=="TRUE"
	destring freeunits, replace
	replace manufacturer = strupper(manufacturer)
	* Change quarter from # to Q#yy to match format in 2005-2012 data
	gen Q = "Q"
	tostring yyyy, gen(yy)
	replace yy = substr(yy, 3, 2)
	egen temp = concat(Q quarter yy)
	drop quarter Q yy
	rename temp quarter
	replace teaching="1" if teaching=="Yes"
	replace teaching="0" if teaching=="No"
	destring teaching, replace
	
	*** Renaming lab ids
    replace lab_id="CathLab070" if lab_id=="Cathlab070"
    replace lab_id="CathLab093" if lab_id=="Cathlab093"
    replace lab_id="CathLab325" if lab_id=="Cathlab325"
    replace lab_id="CathLab364" if lab_id=="Cathlab364"
    replace lab_id="CathLab484" if lab_id=="Cathlab484"
    replace lab_id="CathLab495" if lab_id=="Cathlab495"
    replace lab_id="CathLab508" if lab_id=="Cathlab508"
    replace lab_id="CathLab514" if lab_id=="Cathlab514"
    replace lab_id="CathLab546" if lab_id=="Cathlab546"
    replace lab_id="CathLab566" if lab_id=="Cathlab566"
    replace lab_id="CathLab573" if lab_id=="Cathlab573"
    replace lab_id="CathLab574" if lab_id=="Cathlab574"
    replace lab_id="CathLab582" if lab_id=="Cathlab582"
	
	*** Dropping duplicate entries
    *duplicates report
    duplicates drop    

    *** Appending to 2012 data (depending on RAM, may need to do this in opposite order to prevent crashing)
	gsort lab_id yyyy mm statistics_id
	keep statistics_id yyyy mm quarter lab_id region group_name manufacturer compliant balloon product length diameter q revenue type_name platform_name freeunits teaching
	*save $dirscratch/USdevices_unclean_2013Q2temp.dta, replace
	*clear
	*use $dirscratch/USdevices_unclean_2012m7.dta
	*append using $dirscratch/USdevices_unclean_2013Q2temp.dta
    append using $dirscratch/USdevices_unclean_2012m7.dta	
	
	*** Saving data
	gsort lab_id yyyy mm statistics_id
	save $dirscratch/USdevices_unclean_2013Q2.dta, replace

	
	
*****
***** MERGE DIAGNOSTIC DATA INTO DEVICE UTILIZATION DATA
*****
	
	*** Use device usage data
	clear
	use $dirscratch/USdevices_unclean_2013Q2.dta
	
	*** Merge to diagnostic info data
    merge m:1 lab_id yyyy mm using $dirscratch/USprocedures_unclean_2013Q2.dta
	
	*** Check that unmerged obs. from master data correspond to dropped statistics_id's from diagnostic data, drop
	tab statistics_id if _merge==2, mis
	drop if _merge==1
	drop if _merge == 2
	drop _merge
	
	*** Generating time index (t=1 is Jan 2004)
    egen t=group(yyyy mm)
	*tab yyyy mm if t==1
	
	*** Renaming variables 
    rename diagnosticangiography diagnostic

    *** Generating dummy variables for stent categories
    gen     DES    = 0 if type_name == "BMS" | type_name == "Bare Metal"
    replace DES    = 1 if type_name == "DES" | type_name == "Drug Eluting"
	drop type_name

    *** Dropping cutting balloons
    drop if balloon=="Cutting"

	*** Saving dataset (to be written over in next section of code after product names editing)
	save $dirscratch/USdevicesprocedures_unclean_2013Q2.dta, replace

	
	
		
*****
***** CLEAN MANUFACTURER AND PRODUCT NAMES, OUTPUT TABLE
*****

    *** Dropping duplicate entries
		duplicates report 
		duplicates drop  

	*** Keep coronary stent group
		keep if group_name=="Coronary Stents"
	
	*** Dropping stent grafts
		drop if product=="Jomed Jostent Graftmaster" | product=="Jostent" | product=="Jostent Graftmaster" | product=="Jostent Stent Graft" | product=="Stent graft"
		drop if product=="Magic Wallstent" | product=="Radius" | product=="Radius Stent" | product=="BeStent 2" | product=="Stent graft" | product=="AneuGraft" | product== "GraftMaster"

	*** Drops of non-coronary stent products
		drop if product=="Carotid Wallstent Monorail" | product=="Crossail Balloon" | manufacturer=="INLAY" | product=="Procomm" | product=="RX Herculink" |  product=="NIR Elite/NIRoyal Elite Monorail" |  product=="NIR Elite/NIRoyal Elite"
		* Note for "Symbiot Covered Stent System": used specifically in saphenous (leg) veins that have been grafted into the heart in CABG
		drop if product=="Symbiot Covered Stent System"

	*** Clean manufacturer names, including M&A activity
		* IBS and International Biomedical Systems 
        replace manufacturer="IBS" if manufacturer=="INTERNATIONAL BIOMEDICAL SYSTEMS"
		* Replacing for M&A activity
        replace manufacturer="GUIDANT" if manufacturer=="ABBOTT VASCULAR"
        replace manufacturer="CID VASCULAR" if manufacturer=="SORIN"
		* Sahajanand Medical Technologies 
        replace manufacturer = "SAHAJANANDS MEDICAL TECHNOLOGIES" if manufacturer == "AHAJANANDS MEDICAL TECHNOLOGIES"
		* UK Medical
		replace manufacturer="MINVASYS" if manufacturer=="UK MEDICAL"
		
    *** Cleaning BMS names 
        * Abbott Vascular / Guidant
        replace product="Multilink Frontier" if product=="Frontier" & manufacturer=="ABBOTT VASCULAR" 
        replace product="Jomed Jostent Graftmaster" if product=="Jostent Stent Graft" & manufacturer=="ABBOTT VASCULAR" 
        replace product="Multilink Vision" if product=="Vision" & manufacturer=="ABBOTT VASCULAR" 
        replace product="Josonics Flex Stent" if product=="Josonics Flex stent" & manufacturer=="ABBOTT VASCULAR" 
        replace product = "Jostent Flexmaster" if product == "Jostent Bifurcation and Sidebranch stent"
		* B. Braun 
        replace product="Coroflex Blue" if product=="CoroflexÆ Blue" & manufacturer=="B. BRAUN"
        replace product="Coroflex Delta" if product=="CoroflexÆ Delta" & manufacturer=="B. BRAUN" 
        replace product="Coroflex Theca" if product=="CoroflexÆ Theca" & manufacturer=="B. BRAUN" 
		replace product = "Coroflex Blue" if product == "Coroflex Blue Ultra"
        * Biotronik 
        replace product="Lekton Motion" if product=="Lekton" & manufacturer=="BIOTRONIK" 
        replace product="Lekton Motion" if product=="Motion" & manufacturer=="BIOTRONIK" 
         * Minvasys 
        replace product="Amazonia CroCo" if product=="Amazonia" & manufacturer=="MINVASYS" 
        * Translumina 
        replace product="Yukon Plus" if product=="Not Classified BMS" & manufacturer=="TRANSLUMINA" 
		* More name changes
		replace product = "BiodivYsio" 						if product=="BiodivYsio OC" | product=="BiodivYsio SV"
        replace product = "Bx"                               if product=="Bx Sonic" | product=="Bx Velocity"
        replace product = "Vision"                           if product=="Mini Vision"
        replace product = "Multilink"                        if product=="Penta" | product=="Pixel" | product=="Tetra" | product=="Ultra" | product=="Zeta"
        replace product = "Driver"                           if product=="Micro-Driver" | product=="Driver OTW" | product=="Driver Sprint"
		replace product = "S Series"                         if product=="S660" | product=="S670" | product=="S7" | product=="S660 Zipper MX" | product=="S7 Zipper MX"  
        replace product = "Bx Hepa"                          if product=="Bx Velocity  w/ Hepac" | product=="Bx Velocity  w/ Hepacoat" | product=="Bx Sonic W/Hepacoat" | product=="Bx Velocity w/ Hepacoat"
        replace product = "Arthos"                           if product=="Arthos Inert" | product=="Arthos Pico"
        replace product = "Helistent"                        if product=="Helistent Classique" | product=="Helistent Titan"
        replace product = "Multilink"                        if product=="Multi-Link 8"
		replace product = "Xtrm-Track" 						if product=="Xtrm-Tracky"
		replace product = "Titan2"      					if product=="Titan2 Xlarge" | product=="Titan2 Xsmall"

	*** Cleaning DES names 
        * B. Braun 
        replace product="Coroflex Please" 					if product=="CoroflexÆ Please" & manufacturer=="B. BRAUN" 
        * Boston Scientific 
        replace product="Taxus Express2" 					if product=="Not Classified DES - BSX" & manufacturer=="BOSTON SCIENTIFIC"
		* Other stent names
		replace product = "Janus Carbostent DES"             if product=="Janus Carbostent" & DES==1
		replace product="Taxus Liberte" 					if product=="Taxus Liberte Long"
		replace product="Taxus Element"						if product=="ION" & manufacturer=="BOSTON SCIENTIFIC"
		* Mirroring change made in US file here (0 obs. of these in EU)
        replace product = "Taxus Express2" if product == "Taxus" | product=="Taxus Express"
		* Fix Xience Xpedition
		replace product="Xience Xpedition" if product=="XIENCE Xpedition"
		* Update Igaki-Tamai stent's DES status (technically it is bio-absorbable, but also DES)
		replace DES=1 if product=="IGAKI-TAMAI STENT"
		
	*** Renaming stents to indicate if they are bifurcation stents; create flag
		replace product="Frontier Bifurcation" if product=="Frontier"
		replace product="Nile Bifurcation" if product=="Nile"
		replace product="Nile Croco Bifurcation" if product=="Nile Croco"
		replace product="Nile Pax Bifurcation" if product=="Nile Pax"
		replace product="Sideguard Bifurcation" if product=="Sideguard"
		replace product="Sidekick Bifurcation" if product=="Sidekick"
		replace product="Twin Rail Bifurcation" if product=="Twin Rail"
		gen bifurcation=0
		replace bifurcation=1 if regexm(product, "Bifurcation")
		
	*** Other changes
		* Note for "Symbiot Covered Stent System": used specifically in saphenous (leg) veins that have been grafted into the heart in CABG
		replace DES=1 if product=="V-Flex Plus"
		* Changing Guidant's Absorb DES indicator to 1 (from missing); these were stents used in a clinical trial
		replace DES=1 if product=="Absorb" & (manufacturer=="GUIDANT" | manufacturer=="ABBOTT VASCULAR")

	*** Merges
		* R-Stent/R-Stent Evolution2 -> R-Stent (based on Endeavor Sprint logic)
		replace product = "R-Stent"         if product == "R-Stent/R-Stent Evolution2"
		* Sprint is a balloon, Endeavor Sprint is Endeavor DES mounted on Sprint balloon.
		* Since Sprint is just a delivery system, merging Endeavor and Endeavor Sprint.
		replace product = "Endeavor" if product == "Sprint" | product=="Endeavor Sprint"
		* Combine Jomed Flexmaster and Jomed Flexmaster
		count if product == "Jomed Flexmaster"
		count if product == "Jostent Flexmaster"
		replace product = "Jomed Flexmaster" if product == "Jostent Flexmaster" | product=="FlexMaster F1"
		* Merge Liberte and VeriFlex (Liberte)
		count if product == "VeriFlex (Liberte)"
		count if product == "Liberte"
		replace product = "Liberte" if product == "VeriFlex (Liberte)"
		* First line is based on an explicit size extension; second is based on naming pattern
		replace product = "Taxus Express2" if product == "Taxus Express Atom"
		replace product = "Taxus Liberte" if product == "Taxus Liberte Atom"
		* Not hard evidence, but based on other naming patterns for Boston Scientific, this change seems warranted
		replace product = "Promus" if product == "Promus 2.25"
		* Combination of homepage name evidence (Xience Prime) and product specifications (Xience Nano)
		replace product = "Xience V" if product == "Xience Nano"
		* Merge Genius and Genius Megaflex
		replace product="Genius Megaflex" if product=="Genius"
		* Merge Tecnic Carbostent
		replace product="Tecnic Carbostent Plus" if product=="Tecnic Carbostent"
		* Merge Sirius Carbostent and Syncro Carbostent (which is just a new delivery system)
		replace product="Syncro Carbostent" if product=="Sirius Carbostent"
		* Merge Tenax and Rithron
		replace product="Tenax" if product=="Rithron"


		
    *** Combining across similar stents and all sizes
		* Dropping some variables
		keep yyyy mm t lab_id statistics_id manufacturer product revenue q DES bifurcation teaching diagnostic state_code region avgprice
        		
		* This is the stent merge (across similar stents or different sizes of same stent)
        collapse (sum) revenue q (mean) avgprice, by(t yyyy mm lab_id manufacturer product DES bifurcation teaching diagnostic state_code region) 
        label variable revenue "Revenue"
        label variable q "# of Units"

        * Generating price variable (in euros)
        gen p = revenue / q

	*** Saving dataset
		save $dirscratch/USdevicesprocedures_unclean_2013Q2.dta, replace
		
* don't forget to output table as requested
	
	
	
	

	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
******** GET US APPROVAL DATES!!!	see us-products-ellie.xls
	
	
	
	
		
*****
***** GENERATING TABLE OF KEPT PRODUCTS
*****

	use $dirscratch/USdevicesprocedures_unclean_2013Q2.dta, replace

*** Make variable for empirical US entry date, collapse down
	bysort manufacturer product: egen t_enter_US = min(t)
	keep manufacturer product DES t t_enter_US yyyy mm
	duplicates drop
	keep if t==t_enter_US
	drop t
	rename yyyy yyyy_enter_US
	rename mm mm_enter_US

*** Merge to uncleaned EU data
	merge 1:m manufacturer product DES using $dirwork/_scratch/cleaning-eu-data/EUdevicesprocedures_unclean_vintage_2013Q2.dta

	keep manufacturer product DES vintage t_enter_US yyyy_enter_US mm_enter_US
	duplicates drop

*** Order and sort, output
	keep manufacturer product DES vintage t_enter_US
	order manufacturer product DES vintage t_enter_US
	la var vintage "EU vintage (checked)"
	la var t_enter_US "US empirical entry"
	sort manufacturer product

	export excel using $dirscratch/product_list_checks.xls, firstrow(varlabels) replace
